get_all_inspections <- function(url) {
all_inspections = vector("list", length = 0)
loop_index = 1
chunk_size = 50000
DO_NEXT = TRUE
while (DO_NEXT) {
message("Getting data, page ", loop_index)
all_inspections[[loop_index]] =
GET(url,
query = list(`$order` = "zipcode",
`$limit` = chunk_size,
`$offset` = as.integer((loop_index - 1) * chunk_size)
)
) %>%
content("text") %>%
fromJSON() %>%
as_tibble()
DO_NEXT = dim(all_inspections[[loop_index]])[1] == chunk_size
loop_index = loop_index + 1
}
all_inspections
}
url = "https://data.cityofnewyork.us/resource/9w7m-hzhe.json"
nyc_restaurant_inspections <- get_all_inspections(url) %>%
bind_rows() %>%
rename(restaurant_id = camis, restaurant_name = dba) %>%
mutate(cuisine_description = if_else(cuisine_description == "Café/Coffee/Tea", "Cafe/coffee", cuisine_description),
cuisine_description = if_else(cuisine_description == "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "Latin", cuisine_description)) %>%
mutate(score = as.numeric(score),
grade_new = case_when(
score < 14 ~ "A",
score >= 14 & score < 28 ~ "B",
score >= 28 ~ "C")) %>%
filter(is.na(grade) == FALSE)
## Getting data, page 1
## Warning in strptime(x, fmt, tz = "GMT"): unknown timezone 'zone/tz/2017c.
## 1.0/zoneinfo/America/New_York'
## Getting data, page 2
## Getting data, page 3
## Getting data, page 4
## Getting data, page 5
## Getting data, page 6
## Getting data, page 7
## Getting data, page 8
Column
Chart A
nyc_restaurant_inspections %>%
filter(grade_new %in% c("A", "B", "C")) %>%
select(cuisine_description, grade_new) %>%
group_by(cuisine_description) %>%
count(grade_new) %>%
spread(key = grade_new, value = n) %>%
mutate(total = rowSums(cbind(A, B, C), na.rm = TRUE)) %>%
filter(total >= 3000) %>%
plot_ly(x = ~cuisine_description, y = ~A, type = 'bar', name = 'A') %>%
add_trace(y = ~B, name = 'B') %>%
add_trace(y = ~C, name = 'C') %>%
layout(yaxis = list(title = 'Count'), barmode = 'stack') %>%
layout(title = "Count of restaurant grades by cuisine type",margin = list(b = 75), xaxis = list(title = "", tickangle = 45))
Column
Chart B
nyc_restaurant_inspections %>%
filter(boro == "MANHATTAN") %>%
mutate(zipcode = as.character(zipcode)) %>%
group_by(zipcode) %>%
mutate(num_restaurants_man = n()) %>%
mutate(text_label = stringr::str_c("Zipcode:", zipcode, " Num_restaurants:", num_restaurants_man)) %>%
plot_ly(x = ~zipcode, y = ~num_restaurants_man,color = ~boro, type = "scatter", mode = "marker", text = ~text_label) %>%
layout(title = "Count of restaurants in each zip code in Manhattan", margin = list(b = 75))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
Chart C
#Plotly
nyc_restaurant_inspections %>%
filter(grade_new == "A", boro != "Missing", score > 0 ) %>%
drop_na() %>%
plot_ly(y = ~score, color = ~boro, type = "box") %>%
layout(title = "Distribution of scores within A-graded restaurants, by borough")